In [1]:
import os

## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')

import networkx as nx
from genesys_evaluate import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__) 
1.13.0.post200
1.9.1
In [3]:
## Genes considered/used (shared among samples) 
gene_list = pd.read_csv('./gene_list_1108.csv')

Load Data¶

In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
    data = pickle.load(file_handle)
    
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
                         batch_size = batch_size,
                         shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types 
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"

Load trained GeneSys model¶

In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"best_ALL_1130_continue.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
  (fc1): Sequential(
    (0): Linear(in_features=17513, out_features=256, bias=True)
    (1): Dropout(p=0.2, inplace=False)
    (2): GaussianNoise()
  )
  (fc): Sequential(
    (0): ReLU()
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=10, bias=True)
  )
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (b_to_z): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (bz2_infer_z1): DBlock(
    (fc1): Linear(in_features=1024, out_features=256, bias=True)
    (fc2): Linear(in_features=1024, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z1_to_z2): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z_to_x): Decoder(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=17513, bias=True)
  )
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [14]:
## GRN for the transition t3 to t5
for ct in cts:
    print(ct)
    cws = np.zeros((len(loader), 17513, 17513))
    with torch.no_grad():
        for i, sample in enumerate(loader):
            x = sample['x'].to(device)
            y = sample['y'].to(device)
            y_label = [num2class[i] for i in y.tolist()]
            
            pred_h = model.init_hidden(batch_size)
            tfrom = model.generate_next(x, pred_h, 3).to('cpu').detach().numpy()
            cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
            
            pred_h = model.init_hidden(batch_size)
            tto = model.generate_next(x, pred_h, 5).to('cpu').detach().numpy()   
            cto = tto[np.where(np.array(y_label)==ct)[0],:]
            
            cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
            cws[i] = cw
    
    ## Calculate mean across number of repeats
    cwm = np.mean(cws, axis=0)
    ctw[cts.index(ct)] = cwm
Atrichoblast
Trichoblast
Cortex
Endodermis
Pericycle
Procambium
Xylem
Phloem
Lateral Root Cap
Columella
In [15]:
# Save the array to disk
np.save('genesys_ctw_t3-t5.npy', ctw)
In [9]:
ctw = np.load('genesys_ctw_t3-t5.npy')
In [10]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
    ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [11]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
    ctw_f[i] = np.abs(ctw_z[i]) > threshold

Load TFs list¶

In [12]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [13]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [14]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[14]:
NAC001    1
PRE5      1
MYB118    1
MYB21     1
MYB0      1
Name: Name, dtype: int64

Network analysis¶

In [15]:
TFidx = []
for i in wanted_TFs['GeneID']:
    if i in gene_list['features'].tolist():
        TFidx.append(np.where(gene_list['features']==i)[0][0])

TFidx = np.sort(np.array(TFidx))
In [16]:
def network(i):
    ## No weights
    adj_nw = ctw_f[i]
    ## Weighted
    adj = ctw[i]*ctw_f[i]
    ## TF only
    adj = adj[np.ix_(TFidx,TFidx)]
    adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
    
    ## Remove no connect 
    regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
    taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
    ## Reciprocol
    keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
    #keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
    
    TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
    ## TF name to keep
    TFname = []
    for i in np.array(gene_list['features'][TFidx])[keepidx]:
        TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
        
    adj = adj[np.ix_(keepidx,keepidx)]
    
    # Create a NetworkX graph for non-directed edges
    G = nx.Graph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=abs(weight), distance=1/abs(weight))
            

    ## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
    closeness_centrality = nx.closeness_centrality(G, distance='distance')
    ## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
    eigenvector_centrality = nx.eigenvector_centrality(G)
    
    # Create a NetworkX graph for diected edges
    G = nx.MultiDiGraph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=weight)
    
    ## Measures the number of connections (edges) each node has
    degree_centrality = nx.degree_centrality(G)
    # Calculate outgoing centrality
    out_centrality = nx.out_degree_centrality(G)
    # Calculate incoming centrality
    in_centrality = nx.in_degree_centrality(G)
    ## Measures the extent to which a node lies on the shortest paths between other nodes.
    betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
    
    ## Non_Reciprocal Out centrality
    # Visualize the graph
    pos = nx.spring_layout(G)  # Positions of the nodes
    
    # Node colors based on weighted betweenness centrality
    node_colors = [out_centrality[node] for node in G.nodes()]
    
    # Node sizes based on weighted betweenness centrality
    node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]

    # Get the edge weights as a dictionary
    edge_weights = nx.get_edge_attributes(G, 'weight')
    edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
    
    # Scale the edge weights to desired linewidths
    max_weight = max(edge_weights.values())
    edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
    
    # Draw the graph
    nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
    # Add node labels
    labels = {node: G.nodes[node]['name'] for node in G.nodes}
    nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
    
    # Add a colorbar to show the weighted betweenness centrality color mapping
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
    sm.set_array([])
    plt.colorbar(sm)
    
    # Show the plot
    plt.show()
    
    dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
    oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
    ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
    bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
    cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
    ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
    df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
    df.index =TFname
    df = df.sort_values('betweenness_centrality', ascending=False)
    
    return(df)
In [17]:
atri = network(0)
In [18]:
tri = network(1)
In [19]:
cor = network(2)
In [20]:
end = network(3)
In [21]:
per = network(4)
In [22]:
pro = network(5)
In [23]:
xyl = network(6)
In [24]:
phl = network(7)
In [25]:
lrc = network(8)
In [26]:
col = network(9)
In [27]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [28]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts(), columns=['tf_occurance'])
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [29]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[29]:
atri_betweenness_centrality tri_betweenness_centrality lrc_betweenness_centrality atri_out_centrality tri_out_centrality lrc_out_centrality atri_in_centrality tri_in_centrality lrc_in_centrality centrality_count centrality_sum
ATS 0.877999 0.092757 0.732649 0.203125 0.231231 0.210769 0.239583 0.136637 0.260000 9 11.984750
GL2 0.858845 0.170035 0.274287 0.750000 0.087087 0.229231 0.364583 0.013514 0.055385 9 11.802966
TTG2 0.891047 0.001147 0.011229 0.494792 0.061562 0.260000 0.645833 0.012012 0.027692 9 11.405314
ATCTH 0.049676 0.009397 0.255351 0.216146 0.126126 0.058462 0.067708 0.039039 0.176923 9 9.998829
In [30]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[30]:
atri_betweenness_centrality tri_betweenness_centrality atri_out_centrality tri_out_centrality atri_in_centrality tri_in_centrality centrality_count centrality_sum
AT3G53370 0.113217 0.920005 0.127604 0.136637 0.026042 0.899399 6 8.222903
RHD6 0.683060 0.792994 0.020833 0.337838 0.062500 0.226727 6 8.123952
AT2G37120 0.014401 0.783233 0.158854 0.169670 0.140625 0.358859 6 7.625642
AT4G09100 0.269807 0.851683 0.007812 0.039039 0.026042 0.373874 6 7.568257
ARR5 0.066614 0.856897 0.013021 0.016517 0.177083 0.114114 6 7.244245
OFP13 0.016285 0.003563 0.221354 0.145646 0.080729 0.159159 6 6.626736
In [31]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[31]:
atri_betweenness_centrality atri_out_centrality atri_in_centrality centrality_count centrality_sum
OFP18 0.790293 0.059896 0.138021 3 3.988210
TRY 0.375238 0.393229 0.072917 3 3.841384
AT3G05860 0.357002 0.005208 0.291667 3 3.653877
CRF4 0.310569 0.192708 0.083333 3 3.586611
KAN 0.113747 0.018229 0.067708 3 3.199685
MEA 0.004909 0.005208 0.210938 3 3.221055
DAR7 0.002353 0.026042 0.070312 3 3.098707
In [32]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[32]:
tri_betweenness_centrality tri_out_centrality tri_in_centrality centrality_count centrality_sum
LRL3 0.944298 0.049550 0.453453 3 4.447301
RSL4 0.855813 0.084084 0.507508 3 4.447405
RSL2 0.648536 0.004505 0.141141 3 3.794181
ATMYC1 0.590512 0.258258 0.160661 3 4.009431
AT5G44260 0.583025 0.201201 0.064565 3 3.848791
AT2G28710 0.558238 0.025526 0.052553 3 3.636316
AT2G38090 0.415713 0.027027 0.019520 3 3.462259
AT5G06800 0.353225 0.028529 0.084084 3 3.465838
ESE3 0.292312 0.144144 0.099099 3 3.535555
AT3G09735 0.213200 0.184685 0.058559 3 3.456443
RL6 0.176197 0.055556 0.043544 3 3.275296
RSL1 0.162733 0.147147 0.042042 3 3.351923
BPC5 0.054930 0.045045 0.013514 3 3.113489
LSD1 0.032566 0.012012 0.030030 3 3.074608
AT4G39160 0.031048 0.031532 0.196697 3 3.259277
WRKY61 0.009824 0.004505 0.096096 3 3.110425
EGL3 0.003059 0.103604 0.025526 3 3.132189
AT2G18670 0.001499 0.010511 0.070571 3 3.082580
DOT2 0.000494 0.016517 0.034535 3 3.051546
GL3 0.000404 0.051051 0.033033 3 3.084488
AT5G11340 0.000244 0.052553 0.019520 3 3.072316
AT4G31650 0.000052 0.051051 0.018018 3 3.069121
AT2G05160 0.000007 0.021021 0.129129 3 3.150157
In [33]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[33]:
lrc_betweenness_centrality lrc_out_centrality lrc_in_centrality centrality_count centrality_sum
ERF9 0.716722 0.126154 0.269231 3 4.112106
NAC094 0.637973 0.072308 0.272308 3 3.982589
AT1G49475 0.627647 0.118462 0.187692 3 3.933801
RITF1 0.589482 0.015385 0.249231 3 3.854097
LBD4 0.572528 0.104615 0.216923 3 3.894067
GATA17L 0.572180 0.090769 0.127692 3 3.790641
FEZ 0.507221 0.093846 0.175385 3 3.776451
CHR38 0.490551 0.078462 0.132308 3 3.701320
RGL2 0.367365 0.020000 0.089231 3 3.476596
AT2G46160 0.321716 0.061538 0.075385 3 3.458639
ANL2 0.274839 0.146154 0.121538 3 3.542532
AT3G60670 0.195150 0.056923 0.078462 3 3.330535
AT1G31760 0.189767 0.047692 0.050769 3 3.288228
CSDP1 0.174387 0.035385 0.093846 3 3.303617
BBX29 0.117208 0.156923 0.066154 3 3.340284
GATA17 0.099588 0.246154 0.058462 3 3.404203
ARF8 0.082930 0.035385 0.116923 3 3.235238
TLP6 0.053002 0.035385 0.136923 3 3.225310
WRKY27 0.050722 0.144615 0.063077 3 3.258414
PYE 0.005535 0.136923 0.033846 3 3.176304
AT1G19000 0.005519 0.158462 0.018462 3 3.182442
HB4 0.000941 0.047692 0.020000 3 3.068633
NAC052 0.000868 0.061538 0.050769 3 3.113175
KNAT3 0.000505 0.055385 0.060000 3 3.115890
AGL94 0.000491 0.015385 0.060000 3 3.075875
WRKY35 0.000100 0.018462 0.090769 3 3.109330
In [34]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[34]:
col_betweenness_centrality col_out_centrality col_in_centrality centrality_count centrality_sum
AT1G77570 0.830874 0.010654 0.010654 3 3.852183
AT3G12977 0.727953 0.027397 0.368341 3 4.123691
WRKY26 0.675799 0.065449 0.316591 3 4.057839
AT2G36930 0.381861 0.013699 0.004566 3 3.400126
AT1G01260 0.176699 0.053272 0.057839 3 3.287810
NLP7 0.073721 0.077626 0.245053 3 3.396399
AT5G23405 0.060781 0.010654 0.068493 3 3.139928
WRKY14 0.032502 0.031963 0.357686 3 3.422152
GRF2 0.029613 0.004566 0.012177 3 3.046356
WRKY4 0.023184 0.018265 0.115677 3 3.157126
AT5G65910 0.017084 0.112633 0.133942 3 3.263659
STOP1 0.015972 0.101979 0.202435 3 3.320386
HSF A4A 0.012603 0.012177 0.179604 3 3.204384
TRP1 0.012137 0.009132 0.095890 3 3.117160
AT5G06110 0.010826 0.015221 0.124810 3 3.150857
SCL1 0.007592 0.044140 0.240487 3 3.292219
AT3G02890 0.007559 0.039574 0.089802 3 3.136935
IAA20 0.006466 0.018265 0.080670 3 3.105401
TGA4 0.005555 0.031963 0.147641 3 3.185159
BBX28 0.004552 0.001522 0.045662 3 3.051736
EIN3 0.004116 0.100457 0.213090 3 3.317663
E2F1 0.003042 0.022831 0.062405 3 3.088278
AT3G08505 0.003037 0.004566 0.092846 3 3.100450
SNL6 0.003016 0.024353 0.050228 3 3.077598
MBD10 0.003000 0.006088 0.035008 3 3.044096
MYB51 0.002986 0.009132 0.010654 3 3.022773
DRIP2 0.001768 0.150685 0.082192 3 3.234645
ATRX 0.001768 0.210046 0.083714 3 3.295528
ERF73 0.001520 0.003044 0.003044 3 3.007608
AT2G29065 0.001520 0.004566 0.031963 3 3.038049
KIWI 0.001517 0.004566 0.025875 3 3.031959
AT2G20110 0.001510 0.004566 0.018265 3 3.024342
AT5G41020 0.001510 0.013699 0.031963 3 3.047173
NAC082 0.001508 0.028919 0.031963 3 3.062391
ALY2 0.001497 0.016743 0.018265 3 3.036504
SPL12 0.001360 0.178082 0.042618 3 3.222060
NAM 0.000722 0.027397 0.322679 3 3.350798
emb1967 0.000719 0.013699 0.007610 3 3.022028
AT2G27580 0.000503 0.022831 0.033486 3 3.056820
PRR7 0.000074 0.012177 0.162861 3 3.175112
SDG2 0.000044 0.126332 0.068493 3 3.194869
AT1G21780 0.000030 0.015221 0.141553 3 3.156803
RGD3 0.000009 0.018265 0.013699 3 3.031973
CHR11 0.000005 0.132420 0.173516 3 3.305941
In [35]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[35]:
cor_betweenness_centrality end_betweenness_centrality cor_out_centrality end_out_centrality cor_in_centrality end_in_centrality centrality_count centrality_sum
JKD 0.061129 0.006195 0.415459 0.188084 0.693237 0.322430 6 7.686534
RAX2 0.000006 0.003555 0.239130 0.102804 0.016908 0.943925 6 7.306329
BLJ 0.000216 0.013515 0.166667 0.108645 0.012077 0.892523 6 7.193643
AT1G72210 0.019002 0.000033 0.219807 0.094626 0.328502 0.019860 6 6.681830
AT3G24120 0.000316 0.021290 0.144928 0.127336 0.089372 0.191589 6 6.574831
RGL3 0.004884 0.000167 0.089372 0.029206 0.355072 0.001168 6 6.479869
SCR 0.003614 0.003834 0.118357 0.043224 0.002415 0.286215 6 6.457661
AT5G59450 0.000678 0.000020 0.065217 0.035047 0.004831 0.174065 6 6.279859
COL4 0.000012 0.000004 0.086957 0.067757 0.084541 0.035047 6 6.274317
ERF15 0.000012 0.000046 0.118357 0.036215 0.021739 0.080607 6 6.256977
IDD4 0.000041 0.000135 0.065217 0.052570 0.106280 0.010514 6 6.234758
AT4G36860 0.000292 0.000572 0.053140 0.086449 0.021739 0.072430 6 6.234623
AT1G05710 0.001023 0.000164 0.060386 0.093458 0.021739 0.044393 6 6.221164
LAF1 0.000006 0.000038 0.067633 0.082944 0.021739 0.043224 6 6.215584
AT2G44730 0.000012 0.000008 0.048309 0.057243 0.024155 0.053738 6 6.183465
ETR2 0.000883 0.000078 0.065217 0.014019 0.082126 0.018692 6 6.181014
HRS1 0.000585 0.000045 0.055556 0.030374 0.021739 0.072430 6 6.180728
JAZ12 0.000175 0.000001 0.041063 0.051402 0.024155 0.058411 6 6.175207
AT1G64380 0.000012 0.000096 0.050725 0.045561 0.026570 0.017523 6 6.140486
HMG 0.000006 0.000070 0.048309 0.029206 0.053140 0.007009 6 6.137740
AT1G68070 0.000047 0.000022 0.053140 0.037383 0.024155 0.014019 6 6.128765
HY5 0.000053 0.000003 0.024155 0.032710 0.067633 0.003505 6 6.128058
HTA13 0.000006 0.000118 0.038647 0.037383 0.016908 0.033879 6 6.126941
HK2 0.000064 0.000034 0.038647 0.031542 0.041063 0.015187 6 6.126538
AT1G25550 0.001222 0.000398 0.036232 0.039720 0.012077 0.035047 6 6.124695
AT3G06410 0.000298 0.000004 0.043478 0.042056 0.026570 0.011682 6 6.124089
RR3 0.000269 0.000034 0.050725 0.026869 0.033816 0.007009 6 6.118723
AT1G04990 0.000181 0.000014 0.031401 0.025701 0.053140 0.008178 6 6.118615
AL1 0.000199 0.000268 0.043478 0.039720 0.024155 0.008178 6 6.115997
RVN 0.000006 0.000051 0.028986 0.053738 0.014493 0.011682 6 6.108955
AT5G47390 0.000006 0.000742 0.043478 0.026869 0.014493 0.022196 6 6.107784
AT1G58110 0.000889 0.000003 0.024155 0.039720 0.021739 0.016355 6 6.102860
LZF1 0.000023 0.000005 0.031401 0.019860 0.048309 0.002336 6 6.101935
AT3G16280 0.000012 0.000392 0.038647 0.015187 0.012077 0.030374 6 6.096689
AP2 0.000023 0.000415 0.033816 0.030374 0.024155 0.007009 6 6.095793
DBP1 0.000175 0.000489 0.026570 0.025701 0.033816 0.008178 6 6.094930
BBX21 0.000006 0.000004 0.024155 0.038551 0.016908 0.010514 6 6.090138
AT3G17100 0.000240 0.000275 0.016908 0.032710 0.019324 0.016355 6 6.085812
MBD7 0.000012 0.000134 0.026570 0.031542 0.012077 0.008178 6 6.078513
ARR9 0.000018 0.000033 0.016908 0.032710 0.009662 0.016355 6 6.075686
MYB70 0.000298 0.000048 0.024155 0.028037 0.004831 0.017523 6 6.074892
AT2G47850 0.000099 0.000018 0.016908 0.030374 0.014493 0.005841 6 6.067733
AT2G03470 0.000304 0.000146 0.007246 0.032710 0.009662 0.011682 6 6.061751
ABA1 0.000041 0.000007 0.019324 0.028037 0.009662 0.004673 6 6.061744
LUH 0.000012 0.000082 0.009662 0.024533 0.012077 0.007009 6 6.053375
ZAP1 0.000117 0.000005 0.004831 0.025701 0.004831 0.010514 6 6.045999
AT4G22360 0.000041 0.000030 0.014493 0.015187 0.002415 0.005841 6 6.038007
In [36]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
cor_betweenness_centrality cor_out_centrality cor_in_centrality centrality_count centrality_sum
AT2G38300 0.017423 0.086957 0.613527 3 3.717906
MYB86 0.014487 0.101449 0.280193 3 3.396129
AT2G46810 0.003363 0.002415 0.014493 3 3.020271
ARF6 0.000959 0.036232 0.009662 3 3.046853
WRKY69 0.000924 0.074879 0.106280 3 3.182083
AT2G42660 0.000772 0.062802 0.275362 3 3.338936
SYD 0.000673 0.021739 0.012077 3 3.034489
BRM 0.000643 0.031401 0.019324 3 3.051368
AT1G76880 0.000491 0.004831 0.016908 3 3.022230
AT3G49930 0.000415 0.004831 0.024155 3 3.029401
ERF3 0.000374 0.024155 0.019324 3 3.043853
AT5G28300 0.000322 0.002415 0.016908 3 3.019645
AT4G16150 0.000298 0.019324 0.012077 3 3.031699
AT1G63490 0.000298 0.016908 0.012077 3 3.029284
TGA3 0.000298 0.009662 0.012077 3 3.022037
ADA2B 0.000298 0.012077 0.007246 3 3.019622
BZIP60 0.000292 0.031401 0.021739 3 3.053433
AT5G07810 0.000240 0.019324 0.024155 3 3.043718
EBS 0.000205 0.026570 0.019324 3 3.046098
GLK2 0.000187 0.036232 0.050725 3 3.087144
EPR1 0.000146 0.016908 0.016908 3 3.033963
AT2G37650 0.000117 0.004831 0.009662 3 3.014610
AT3G16350 0.000094 0.028986 0.045894 3 3.074973
SPL1 0.000088 0.028986 0.024155 3 3.053228
EFS 0.000088 0.009662 0.009662 3 3.019411
BZIP28 0.000076 0.021739 0.016908 3 3.038723
ALY1 0.000047 0.012077 0.002415 3 3.014540
JAZ11 0.000041 0.004831 0.002415 3 3.007287
EMB2773 0.000041 0.009662 0.002415 3 3.012118
SNL3 0.000041 0.012077 0.002415 3 3.014534
GBF1 0.000041 0.007246 0.002415 3 3.009703
RFI2 0.000035 0.007246 0.014493 3 3.021774
NF-YC4 0.000035 0.009662 0.016908 3 3.026605
tny 0.000023 0.014493 0.007246 3 3.021763
AT2G40620 0.000012 0.033816 0.026570 3 3.060398
SIGF 0.000012 0.007246 0.019324 3 3.026582
RHC1A 0.000006 0.007246 0.004831 3 3.012083
AT5G12850 0.000006 0.043478 0.045894 3 3.089378
AT1G76350 0.000006 0.021739 0.014493 3 3.036238
In [37]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[37]:
end_betweenness_centrality end_out_centrality end_in_centrality centrality_count centrality_sum
AT2G43140 0.145186 0.010514 0.015187 3 3.170887
bZIP58 0.026392 0.014019 0.004673 3 3.045084
AT4G00940 0.013118 0.023364 0.007009 3 3.043492
chr31 0.007627 0.130841 0.336449 3 3.474917
BIB 0.002820 0.024533 0.146028 3 3.173381
... ... ... ... ... ...
NAC78 0.000003 0.033879 0.017523 3 3.051405
AGL26 0.000003 0.018692 0.001168 3 3.019863
RAT5 0.000003 0.037383 0.014019 3 3.051405
AGL16 0.000001 0.035047 0.063084 3 3.098132
NGA3 0.000001 0.032710 0.112150 3 3.144861

80 rows × 5 columns

In [38]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[38]:
per_betweenness_centrality pro_betweenness_centrality xyl_betweenness_centrality phl_betweenness_centrality per_out_centrality pro_out_centrality xyl_out_centrality phl_out_centrality per_in_centrality pro_in_centrality xyl_in_centrality phl_in_centrality centrality_count centrality_sum
AT3G43430 0.814337 0.908875 0.190519 0.786436 0.369617 0.464567 0.315412 0.612551 0.525120 0.854893 0.218638 0.510232 12 18.571197
HB-8 0.680003 0.765353 0.963126 0.156584 0.062201 0.186727 0.523297 0.285130 0.074163 0.583802 0.695341 0.061392 12 17.037118
IAA12 0.649582 0.854891 0.841688 0.253560 0.077751 0.228346 0.397849 0.107776 0.088517 0.689539 0.215054 0.113233 12 16.517786
MYB20 0.569639 0.575999 0.296060 0.067264 0.521531 0.410574 0.419355 0.230559 0.409091 0.682790 0.154122 0.053206 12 16.390189
ATAUX2-11 0.005717 0.684229 0.661497 0.000129 0.062201 0.133858 0.523297 0.090041 0.084928 0.611924 0.602151 0.136426 12 15.596397
HB40 0.006737 0.243082 0.538369 0.575910 0.078947 0.258718 0.311828 0.472033 0.021531 0.092238 0.286738 0.529332 12 15.415464
TCP15 0.572032 0.610118 0.076984 0.354662 0.043062 0.334083 0.240143 0.080491 0.050239 0.518560 0.179211 0.256480 12 15.316067
RMA2 0.003988 0.416876 0.091849 0.673516 0.039474 0.168729 0.103943 0.312415 0.108852 0.102362 0.268817 0.510232 12 14.801052
IAA11 0.318345 0.000507 0.370272 0.066824 0.133971 0.015748 0.433692 0.238745 0.129187 0.076490 0.555556 0.244202 12 14.583538
AT5G50010 0.535855 0.614196 0.004396 0.121603 0.078947 0.109111 0.143369 0.046385 0.080144 0.508436 0.125448 0.096862 12 14.464754
IAA26 0.000586 0.569165 0.000168 0.296964 0.029904 0.246344 0.139785 0.189632 0.051435 0.410574 0.189964 0.233288 12 14.357809
AT1G69580 0.060731 0.492127 0.022292 0.349406 0.065789 0.175478 0.304659 0.136426 0.096890 0.163105 0.164875 0.272851 12 14.304630
MYB88 0.125035 0.517102 0.024174 0.006743 0.137560 0.195726 0.304659 0.184175 0.047847 0.192351 0.258065 0.068213 12 14.061649
TCP14 0.388048 0.380605 0.002411 0.001053 0.078947 0.076490 0.261649 0.088677 0.069378 0.490439 0.075269 0.130969 12 14.043935
AT2G29660 0.001679 0.004497 0.251837 0.016848 0.043062 0.011249 0.197133 0.065484 0.055024 0.284589 0.075269 0.155525 12 13.162196
In [39]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[39]:
per_betweenness_centrality per_out_centrality per_in_centrality centrality_count centrality_sum
LBD14 0.735968 0.175837 0.110048 3 4.021853
IDD11 0.681007 0.245215 0.167464 3 4.093687
bHLH34 0.489293 0.062201 0.017943 3 3.569437
GATA23 0.398047 0.113636 0.096890 3 3.608574
HDA3 0.395608 0.052632 0.090909 3 3.539148
MGP 0.205249 0.105263 0.063397 3 3.373909
AT3G21330 0.140299 0.385167 0.027512 3 3.552978
AT1G01640 0.095084 0.075359 0.050239 3 3.220682
ATWHY2 0.004406 0.068182 0.081340 3 3.153928
AT5G13780 0.002461 0.047847 0.031100 3 3.081408
AT4G20970 0.001332 0.040670 0.047847 3 3.089849
TRFL10 0.000556 0.019139 0.064593 3 3.084288
AL4 0.000163 0.019139 0.028708 3 3.048010
GAI 0.000049 0.049043 0.025120 3 3.074211
LBD29 0.000001 0.017943 0.050239 3 3.068183
In [40]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[40]:
pro_betweenness_centrality pro_out_centrality pro_in_centrality centrality_count centrality_sum
HAT3 0.407458 0.101237 0.163105 3 3.671799
SHY2 0.392416 0.142857 0.260967 3 3.796241
MYB10 0.370478 0.130484 0.174353 3 3.675315
AT5G60142 0.365109 0.070866 0.107987 3 3.543962
AT1G75490 0.301645 0.275591 0.119235 3 3.696470
AT2G40200 0.273545 0.118110 0.130484 3 3.522139
AT2G36340 0.239034 0.068616 0.053993 3 3.361644
GRF9 0.022244 0.086614 0.046119 3 3.154977
AT2G45460 0.014826 0.067492 0.075366 3 3.157683
AT1G26590 0.009573 0.106862 0.041620 3 3.158054
AT3G20640 0.004052 0.067492 0.079865 3 3.151409
AT1G69570 0.003779 0.064117 0.046119 3 3.114015
AT5G51790 0.002250 0.028121 0.128234 3 3.158605
SPL2 0.001390 0.062992 0.078740 3 3.143122
RGL1 0.001125 0.032621 0.098988 3 3.132733
WRKY19 0.000343 0.102362 0.025872 3 3.128577
AT3G10040 0.000077 0.116985 0.017998 3 3.135060
ZFP7 0.000057 0.058493 0.051744 3 3.110293
ARF4 0.000005 0.064117 0.057368 3 3.121490
In [41]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[41]:
xyl_betweenness_centrality xyl_out_centrality xyl_in_centrality centrality_count centrality_sum
AT1G68200 0.967381 0.401434 0.609319 3 4.978134
LBD31 0.966414 0.354839 0.559140 3 4.880392
MYB52 0.960071 0.182796 0.580645 3 4.723512
MYB85 0.950337 0.268817 0.455197 3 4.674351
AT1G66810 0.901872 0.433692 0.666667 3 5.002230
MYB46 0.899732 0.566308 0.842294 3 5.308334
LBD18 0.891738 0.394265 0.677419 3 4.963423
XND1 0.844434 0.222222 0.702509 3 4.769165
BLH3 0.786029 0.121864 0.480287 3 4.388180
VND2 0.784276 0.333333 0.795699 3 4.913308
ZHD3 0.774490 0.577061 0.666667 3 5.018218
AP3 0.734109 0.544803 0.426523 3 4.705436
IAA6 0.719412 0.240143 0.724014 3 4.683569
MYB83 0.704920 0.551971 0.741935 3 4.998827
VND7 0.702780 0.405018 0.799283 3 4.907081
VND4 0.693174 0.487455 0.806452 3 4.987081
VND5 0.685052 0.261649 0.756272 3 4.702973
BEL10 0.677522 0.107527 0.437276 3 4.222325
SHP1 0.622057 0.372760 0.258065 3 4.252882
HB31 0.611885 0.598566 0.301075 3 4.511526
HAT14 0.604097 0.394265 0.426523 3 4.424886
VND3 0.568023 0.415771 0.781362 3 4.765156
VND1 0.542302 0.344086 0.519713 3 4.406101
TCP20 0.338658 0.129032 0.225806 3 3.693497
AT1G24040 0.281517 0.068100 0.179211 3 3.528829
JLO 0.267528 0.197133 0.634409 3 4.099069
DOF1 0.152923 0.222222 0.215054 3 3.590199
AT1G29950 0.137426 0.573477 0.057348 3 3.768250
BZIP49 0.124417 0.222222 0.121864 3 3.468503
VND6 0.117957 0.103943 0.229391 3 3.451291
MYB99 0.063820 0.283154 0.240143 3 3.587117
FBH1 0.061306 0.168459 0.369176 3 3.598940
AT5G04390 0.054679 0.032258 0.154122 3 3.241059
TLP2 0.023452 0.146953 0.168459 3 3.338864
AT2G22200 0.021222 0.035842 0.093190 3 3.150254
AGL58 0.020680 0.433692 0.032258 3 3.486630
PIF4 0.017792 0.050179 0.232975 3 3.300946
AT5G25470 0.014221 0.060932 0.086022 3 3.161174
HB34 0.010688 0.107527 0.168459 3 3.286674
AT4G16610 0.006756 0.107527 0.211470 3 3.325752
HSL1 0.004500 0.215054 0.254480 3 3.474034
AT1G24610 0.003584 0.082437 0.179211 3 3.265233
AT3G19080 0.003584 0.050179 0.064516 3 3.118280
AT1G26610 0.003584 0.168459 0.146953 3 3.318996
AT3G22560 0.002076 0.021505 0.168459 3 3.192040
PKL 0.000232 0.193548 0.025090 3 3.218870
OBP4 0.000026 0.107527 0.086022 3 3.193574
AT5G09460 0.000026 0.086022 0.125448 3 3.211495
AT1G03350 0.000013 0.182796 0.053763 3 3.236572
In [42]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[42]:
phl_betweenness_centrality phl_out_centrality phl_in_centrality centrality_count centrality_sum
HCA2 0.755483 0.125512 0.461119 3 4.342113
AT5G41380 0.694792 0.238745 0.590723 3 4.524260
REM22 0.605331 0.200546 0.289222 3 4.095099
NAC057 0.589463 0.349250 0.523874 3 4.462587
NAC2 0.577023 0.199181 0.459754 3 4.235959
GATA20 0.563876 0.199181 0.300136 3 4.063194
CRF1 0.374751 0.249659 0.316508 3 3.940918
WRKY32 0.314265 0.049113 0.233288 3 3.596667
VOZ1 0.308561 0.083220 0.218281 3 3.610061
NAC086 0.257731 0.189632 0.167804 3 3.615166
SOL1 0.221548 0.156889 0.136426 3 3.514863
SVP 0.208957 0.201910 0.137790 3 3.548657
AGL15 0.170137 0.226467 0.156889 3 3.553493
KAN2 0.165096 0.043656 0.219645 3 3.428397
AT3G46070 0.047807 0.043656 0.027285 3 3.118748
GATA19 0.034174 0.126876 0.081855 3 3.242905
BPEp 0.014312 0.150068 0.035471 3 3.199851
PIF7 0.009350 0.092769 0.066849 3 3.168968
AT1G47570 0.002701 0.192360 0.099591 3 3.294651
HSFA8 0.001142 0.001364 0.012278 3 3.014785
BRH1 0.000835 0.137790 0.122783 3 3.261408
AT1G02030 0.000071 0.117326 0.028649 3 3.146046

Search for individual genes¶

In [43]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[43]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality per_degree_centrality per_out_centrality per_in_centrality per_betweenness_centrality ... xyl_out_centrality xyl_in_centrality xyl_betweenness_centrality xyl_closeness_centrality xyl_eigenvector_centrality phl_degree_centrality phl_out_centrality phl_in_centrality phl_closeness_centrality phl_eigenvector_centrality
SHR 3.0 0.028037 0.023364 0.004673 0.000457 0.022229 0.191388 0.125598 0.065789 0.456631 ... 0.290323 0.014337 0.000077 0.001068 0.059776 0.040928 0.038199 0.002729 0.000703 0.015468

1 rows × 29 columns

In [44]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[44]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality per_degree_centrality per_out_centrality per_in_centrality per_closeness_centrality per_eigenvector_centrality
BLJ 2.0 0.178744 0.166667 0.012077 0.000216 0.000299 0.075978 1.001168 0.108645 0.892523 0.013515 0.000628 0.16166 0.005981 0.003589 0.002392 0.0002 0.005208
In [45]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[45]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
JKD 2.0 1.108696 0.415459 0.693237 0.061129 0.000347 0.160313 0.510514 0.188084 0.32243 0.006195 0.000676 0.102066
In [46]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[46]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
RVN 2.0 0.043478 0.028986 0.014493 0.000006 0.000203 0.028298 0.065421 0.053738 0.011682 0.000051 0.000533 0.037707
In [47]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[47]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
BIB 1.0 0.170561 0.024533 0.146028 0.00282 0.00037 0.064051
In [48]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[48]:
In [49]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[49]:
In [50]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[50]:
tf_occurance atri_degree_centrality atri_out_centrality atri_in_centrality atri_betweenness_centrality atri_closeness_centrality atri_eigenvector_centrality tri_degree_centrality tri_out_centrality tri_in_centrality tri_betweenness_centrality tri_closeness_centrality tri_eigenvector_centrality lrc_degree_centrality lrc_out_centrality lrc_in_centrality lrc_betweenness_centrality lrc_closeness_centrality lrc_eigenvector_centrality
GL2 3.0 1.114583 0.75 0.364583 0.858845 0.000732 0.179213 0.100601 0.087087 0.013514 0.170035 0.000961 0.054577 0.284615 0.229231 0.055385 0.274287 0.000573 0.083088
In [51]:
tf_spec.to_csv('TF_GRN_centrality_t3-t5_zscore3.csv', index=True)
In [ ]: